/*
  author Sylvain Bertrand <sylvain.bertrand@gmail.com>
  Protected by linux GNU GPLv2
  Copyright 2012-2014
*/
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/fs.h>
#include <linux/irq.h>
#include <linux/cdev.h>
#include <linux/slab.h>
#include <asm/uaccess.h>
#include <asm/ioctl.h>
#include <linux/dma-direction.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/atomic.h>
#include <linux/sched.h>
#include <linux/mm.h>

#include <alga/rng_mng.h>
#include <uapi/alga/pixel_fmts.h>
#include <alga/timing.h>
#include <uapi/alga/amd/dce6/dce6.h>
#include <uapi/alga/amd/si/ioctl.h>

#include <uapi/alga/amd/si/pkt.h>

#include "../mc.h"
#include "../rlc.h"
#include "../ih.h"
#include "../fence.h"
#include "../ring.h"
#include "../dmas.h"
#include "../ba.h"
#include "../cps.h"
#include "../gpu.h"
#include "../drv.h"

#include "fops.h"
#include "mmap.h"

long cpu_addr_to_aperture_gpu_addr(struct pci_dev *dev,
				u64 *aperture_gpu_addr, void __iomem *cpu_addr,
						struct vm_area_struct **vma)
{
	struct ba_map *pos;
	struct ba_map *m;
	struct dev_drv_data *dd;
	u64 of;
	long r;

	*vma = find_vma(current->mm, (unsigned long)cpu_addr);
	if (*vma == NULL || (*vma)->vm_private_data == NULL) 
			return NO_VMA_FOUND;

	r = 0;

	dd = pci_get_drvdata(dev);

	m = NULL;
	down_read(&dd->ba.maps_sem);
	/*
	 * XXX: vma can be split? (due to partial mmaping) If so, vm_start is
	 * not reliable to lookup for the right mapping.
	 */
	list_for_each_entry(pos, &dd->ba.maps, n) {
		if (pos->type != BA_MAP_USER_SG)
			continue;
		if ((u64)pos->cpu_addr == (*vma)->vm_start) {
			m = pos;
			break;
		}
	}
	if (m == NULL) {
		r = NO_BA_MAP_FOUND;
		goto unlock_maps_sem;
	}

	of = (u64)((u64)cpu_addr - (*vma)->vm_start);
	*aperture_gpu_addr = m->gpu_addr + of;

unlock_maps_sem:
	up_read(&dd->ba.maps_sem);
	return r;
}

/* we may have to free partially the cpu_ps array */
static void cpu_ps_free(struct vma_private_data *vma_data, unsigned long n)
{
	unsigned long free_cpu_page;
	for (free_cpu_page = 0; free_cpu_page < n; ++free_cpu_page)
		__free_page(vma_data->cpu_ps[free_cpu_page]);
}


static unsigned long vma_cpu_ps_count(struct vm_area_struct *vma)
{
	unsigned long first;
	unsigned long last;

	first = (vma->vm_start & PAGE_MASK) >> PAGE_SHIFT;
	last = (vma->vm_end & PAGE_MASK) >> PAGE_SHIFT;
	return last - first;
}

static void cpu_bus_mapping_remove(struct vm_area_struct *vma)
{
	struct vma_private_data *vma_data;
	struct file_private_data *f_data;

	vma_data = vma->vm_private_data;
	f_data = vma->vm_file->private_data;

	dma_unmap_sg(&f_data->d->dev, vma_data->sg_tbl.sgl,
				vma_data->sg_tbl.nents, DMA_BIDIRECTIONAL);

	sg_free_table(&vma_data->sg_tbl);
}

static void cpu_mapping_remove(struct vm_area_struct *vma)
{
	struct vma_private_data *vma_data;

	vma_data = vma->vm_private_data;

	cpu_ps_free(vma_data, vma_data->cpu_ps_n);
	kfree(vma_data->cpu_ps);
	kfree(vma_data);
	vma->vm_private_data = NULL;
}

static void vma_open(struct vm_area_struct *vma)
{
	struct vma_private_data *vma_data;
	vma_data = vma->vm_private_data;

	atomic_inc(&vma_data->refs_n);
}

static void vma_close(struct vm_area_struct *vma)
{
	struct vma_private_data *vma_data;
	struct file_private_data *f_data;

	vma_data = vma->vm_private_data;
	f_data = vma->vm_file->private_data;

	if (!atomic_dec_and_test(&vma_data->refs_n))
		return;

	/* we are the last */

	ba_unmap(f_data->d, (void __iomem *)vma->vm_start);
	cpu_bus_mapping_remove(vma);
	cpu_mapping_remove(vma);
}

static int vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
	struct vma_private_data *vma_data;

	if (vma == NULL || vma->vm_private_data == NULL)
		return VM_FAULT_SIGBUS;
	vma_data = vma->vm_private_data;

	get_page(vma_data->cpu_ps[vmf->pgoff]);
	/*
	 * Trust the code paths selected by the vmf->page method. We check the
	 * userland flags at mmap entry, and that should be rougthly fine.
	 * Alternative scenarios: use vm_insert_{pfn,page,mixed} to be sure
	 * to go through the right code paths with the right flags are used
	 * (suggested by J.Glisse).
	 */
	vmf->page = vma_data->cpu_ps[vmf->pgoff];
	return 0;
}

static struct vm_operations_struct vm_ops = {
	.open = vma_open,
	.close = vma_close,
	.fault = vma_fault
};

static int cpu_mapping_init_once(struct vm_area_struct *vma)
{
	struct file_private_data *f_data;
	struct vma_private_data *vma_data;
	int err;
	unsigned long cpu_page;

	f_data = vma->vm_file->private_data;

	/* no page offset supported */
	if (vma->vm_pgoff != 0) {
		dev_err(&f_data->d->dev, "aperture mapping:page offset(=0x%016lx) must be zero\n",
								vma->vm_pgoff);
		err = -EINVAL;
		goto err;
	}

	vma_data = kzalloc(sizeof(*vma_data), GFP_KERNEL);
	if (!vma_data) {
		err = -ENOMEM;
		goto err;
	}

	/* we call open later that will increment to 1 the ref count */
	atomic_set(&vma_data->refs_n, 0);

	vma_data->cpu_ps_n = vma_cpu_ps_count(vma);

	vma_data->cpu_ps = kzalloc(sizeof(vma_data->cpu_ps[0]) *
						vma_data->cpu_ps_n, GFP_KERNEL);
	if (!vma_data->cpu_ps) {
		err = -ENOMEM;
		goto err_free_vma_private_data;
	}

	for (cpu_page = 0; cpu_page < vma_data->cpu_ps_n; ++cpu_page) {
		vma_data->cpu_ps[cpu_page] = alloc_page(GFP_USER |
						__GFP_ZERO | __GFP_NOWARN);
		if (!vma_data->cpu_ps[cpu_page]) {
			if (cpu_page)
				cpu_ps_free(vma_data, cpu_page - 1);
			err = -ENOMEM;
			goto err_free_cpu_ps_mem;
		}
	}

	vma->vm_ops = &vm_ops;
	vma->vm_private_data = vma_data;
	vma->vm_flags |= VM_MIXEDMAP | VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
	return 0;

err_free_cpu_ps_mem:
	kfree(vma_data->cpu_ps);

err_free_vma_private_data:
	kfree(vma_data);
err:
	return err;
}

static int cpu_bus_mapping_init(struct vm_area_struct *vma)
{
	struct vma_private_data *vma_data;
	unsigned long start_of;
	unsigned long sz;
	int r;
	struct file_private_data *f_data;

	vma_data = vma->vm_private_data;
	start_of = vma->vm_start & ~PAGE_MASK;
	sz = vma->vm_end - vma->vm_start;

	r = sg_alloc_table_from_pages(&vma_data->sg_tbl,
				&vma_data->cpu_ps[0], vma_data->cpu_ps_n,
						start_of, sz, GFP_KERNEL);
	if (r != 0) {
		r = -ENOMEM;
		goto err;
	}

	f_data = vma->vm_file->private_data;

	vma_data->sg_tbl_list_nents = dma_map_sg(&f_data->d->dev,
				vma_data->sg_tbl.sgl, vma_data->sg_tbl.nents,
							DMA_BIDIRECTIONAL);
	if (vma_data->sg_tbl_list_nents == 0) {
		r = -ENOMEM;
		goto err_free_sg_table;
	}
	return 0;

err_free_sg_table:
	sg_free_table(&vma_data->sg_tbl);
err:
	return r;
}

static int gpu_bus_mapping_init(struct vm_area_struct *vma)
{
	struct vma_private_data *vma_data;
	struct file_private_data *f_data;

	vma_data = vma->vm_private_data;
	f_data = vma->vm_file->private_data;

	return ba_map(f_data->d, (void __iomem *)vma->vm_start,
				&vma_data->sg_tbl, vma_data->sg_tbl_list_nents);
}

/* TODO: test partial unmmaping? */
int fops_mmap(struct file *f, struct vm_area_struct *vma)
{
	int r;

	/* we do *not* want to go through cow mapping code paths */
	if (!(vma->vm_flags & VM_SHARED))
		return -EINVAL;

	r = cpu_mapping_init_once(vma);
	if (r != 0)
		return r;

	r = cpu_bus_mapping_init(vma);
	if (r != 0)
		goto err_remove_cpu_mapping;

	r = gpu_bus_mapping_init(vma);
	if (r != 0)
		goto err_remove_gpu_bus_mapping;

	vma_open(vma);
	return 0;

err_remove_gpu_bus_mapping:
	cpu_bus_mapping_remove(vma);

err_remove_cpu_mapping:
	cpu_mapping_remove(vma);
	return r;
}
